library(readr)
library(tidyr)
library(dplyr)
library(ggplot2)
library(lubridate)
library(UsingR)
library(ggcorrplot)
library(usmap)
library(PerformanceAnalytics)
library(ggcorrplot)
library(vcd)
library(corrr)
library(tidyverse)
library(rcompanion)
Cmd+Option+I.
df0 <- read_csv("Provisional_COVID-19_Deaths_by_Place_of_Death_and_Age.csv")
[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────[39m
cols(
`Data as of` = [31mcol_character()[39m,
`Start Date` = [31mcol_character()[39m,
`End Date` = [31mcol_character()[39m,
Group = [31mcol_character()[39m,
Year = [33mcol_logical()[39m,
Month = [33mcol_logical()[39m,
`HHS Region` = [32mcol_double()[39m,
State = [31mcol_character()[39m,
`Place of Death` = [31mcol_character()[39m,
`Age group` = [31mcol_character()[39m,
`COVID-19 Deaths` = [32mcol_double()[39m,
`Total Deaths` = [32mcol_double()[39m,
`Pneumonia Deaths` = [32mcol_double()[39m,
`Pneumonia and COVID-19 Deaths` = [32mcol_double()[39m,
`Influenza Deaths` = [32mcol_double()[39m,
`Pneumonia, Influenza, or COVID-19 Deaths` = [32mcol_double()[39m,
Footnote = [31mcol_character()[39m
)
183708 parsing failures.
row col expected actual file
4375 Year 1/0/T/F/TRUE/FALSE 2020 'Provisional_COVID-19_Deaths_by_Place_of_Death_and_Age.csv'
4376 Year 1/0/T/F/TRUE/FALSE 2020 'Provisional_COVID-19_Deaths_by_Place_of_Death_and_Age.csv'
4377 Year 1/0/T/F/TRUE/FALSE 2020 'Provisional_COVID-19_Deaths_by_Place_of_Death_and_Age.csv'
4378 Year 1/0/T/F/TRUE/FALSE 2020 'Provisional_COVID-19_Deaths_by_Place_of_Death_and_Age.csv'
4379 Year 1/0/T/F/TRUE/FALSE 2020 'Provisional_COVID-19_Deaths_by_Place_of_Death_and_Age.csv'
.... .... .................. ...... ...........................................................
See problems(...) for more details.
df0 %>% head()
# taking away the month and year since no info
df1= df0[,-c(5,6)]
df1 %>% head()
unique(df1[,1]) # no need for this one since all have been lastlyupdated at once
df2= df1[,-c(1)]
df2 %>% head()
colnames(df2)
[1] "Start Date"
[2] "End Date"
[3] "Group"
[4] "HHS Region"
[5] "State"
[6] "Place of Death"
[7] "Age group"
[8] "COVID-19 Deaths"
[9] "Total Deaths"
[10] "Pneumonia Deaths"
[11] "Pneumonia and COVID-19 Deaths"
[12] "Influenza Deaths"
[13] "Pneumonia, Influenza, or COVID-19 Deaths"
[14] "Footnote"
dim(df2)
[1] 104976 14
help("as.Date.character")
df2[1,1]
dates<-df2[,1]
# view column data class: simple trial
class(df2$`Start Date`)
[1] "character"
dateOnly <- as.Date(df2$`Start Date`, format="%d/%m/%Y")
dateOnly
[1] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[7] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[13] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[19] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[25] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[31] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[37] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[43] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[49] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[55] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[61] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[67] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[73] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[79] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[85] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[91] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[97] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[103] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[109] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[115] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[121] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[127] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[133] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[139] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[145] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[151] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[157] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[163] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[169] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[175] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[181] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[187] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[193] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[199] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[205] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[211] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[217] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[223] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[229] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[235] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[241] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[247] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[253] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[259] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[265] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[271] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[277] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[283] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[289] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[295] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[301] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[307] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[313] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[319] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[325] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[331] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[337] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[343] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[349] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[355] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[361] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[367] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[373] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[379] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[385] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[391] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[397] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[403] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[409] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[415] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[421] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[427] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[433] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[439] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[445] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[451] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[457] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[463] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[469] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[475] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[481] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[487] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[493] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[499] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[505] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[511] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[517] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[523] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[529] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[535] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[541] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[547] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[553] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[559] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[565] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[571] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[577] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[583] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[589] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[595] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[601] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[607] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[613] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[619] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[625] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[631] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[637] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[643] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[649] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[655] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[661] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[667] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[673] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[679] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[685] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[691] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[697] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[703] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[709] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[715] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[721] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[727] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[733] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[739] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[745] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[751] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[757] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[763] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[769] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[775] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[781] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[787] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[793] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[799] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[805] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[811] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[817] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[823] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[829] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[835] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[841] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[847] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[853] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[859] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[865] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[871] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[877] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[883] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[889] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[895] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[901] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[907] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[913] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[919] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[925] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[931] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[937] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[943] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[949] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[955] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[961] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[967] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[973] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[979] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[985] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[991] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[997] "2020-01-01" "2020-01-01" "2020-01-01" "2020-01-01"
[ reached 'max' / getOption("max.print") -- omitted 103976 entries ]
class(dateOnly)
[1] "Date"
# transforming the columns start date and end date to date format:
df3<-df2
df3$`Start Date`<- as.Date(df2$`Start Date`, format="%m/%d/%Y")
df3
df3$`End Date`<- as.Date(df2$`End Date`, format="%m/%d/%Y")
df3
#transform the other columns ( "Group","HHS Region","State","Place of Death" "Age group") into factors:
df4<-df3
col_names <- colnames(df4[,3:7])
df4[col_names] <- lapply(df4[col_names] , factor)
df4
#final dataframe working with:
df<-df4
df
# function to plot: pick the data set
LevelPlots <- function(dataframe, variable, name_variable, Chosen_Factor) {
ggplot(data = dataframe, aes(x = `Start Date` , y = variable)) +
geom_point(aes(colour = factor(Chosen_Factor)))+
labs(x = "Start Date",
y = name_variable )
}
#
HistoPlots <- function(dataframe, variable, name_variable) {
ggplot(data=dataframe, aes(variable)) +
geom_histogram(breaks=seq(20, 50, by=2),
col="red",
aes(fill=..count..)) +
labs( title =name_variable )
}
# Total deaths :
LevelPlots(df, df$`Total Deaths`,"Total Deaths",df$Group)
LevelPlots(df, df$`Total Deaths`,"Total Deaths",df$`HHS Region`)
LevelPlots(df, df$`Total Deaths`,"Total Deaths",df$State)
LevelPlots(df, df$`Total Deaths`,"Total Deaths",df$`Place of Death`)
LevelPlots(df, df$`Total Deaths`,"Total Deaths",df$`Age group`)
HistoPlots(df, df$`Total Deaths`,"Total Deaths")
# "Pneumonia Deaths"
LevelPlots(df, df$`Pneumonia Deaths`,"Pneumonia Deaths",df$Group)
LevelPlots(df, df$`Pneumonia Deaths`,"Pneumonia Deaths",df$`HHS Region`)
LevelPlots(df, df$`Pneumonia Deaths`,"Pneumonia Deaths",df$State)
LevelPlots(df, df$`Pneumonia Deaths`,"Pneumonia Deaths",df$`Place of Death`)
LevelPlots(df, df$`Pneumonia Deaths`,"Pneumonia Deaths",df$`Age group`)
HistoPlots(df, df$`Pneumonia Deaths`,"Pneumonia Deaths")
# "Pneumonia and COVID-19 Deaths"
LevelPlots(df, df$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df$Group)
LevelPlots(df, df$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df$`HHS Region`)
LevelPlots(df, df$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df$State)
LevelPlots(df, df$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df$`Place of Death`)
LevelPlots(df, df$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df$`Age group`)
HistoPlots(df, df$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths")
# "Influenza Deaths"
LevelPlots(df, df$`Influenza Deaths`,"Influenza Deaths",df$Group)
LevelPlots(df, df$`Influenza Deaths`,"Influenza Deaths",df$`HHS Region`)
LevelPlots(df, df$`Influenza Deaths`,"Influenza Deaths",df$State)
LevelPlots(df, df$`Influenza Deaths`,"Influenza Deaths",df$`Place of Death`)
LevelPlots(df, df$`Influenza Deaths`,"Influenza Deaths",df$`Age group`)
HistoPlots(df, df$`Influenza Deaths`,"Influenza Deaths")
# "Pneumonia, Influenza, or COVID-19 Deaths"
LevelPlots(df, df$`Pneumonia, Influenza, or COVID-19 Deaths`,"Pneumonia, Influenza, or COVID-19 Deaths",df$Group)
LevelPlots(df, df$`Pneumonia, Influenza, or COVID-19 Deaths`,"Pneumonia, Influenza, or COVID-19 Deaths",df$`HHS Region`)
LevelPlots(df, df$`Pneumonia, Influenza, or COVID-19 Deaths`,"Pneumonia, Influenza, or COVID-19 Deaths",df$State)
LevelPlots(df, df$`Pneumonia, Influenza, or COVID-19 Deaths`,"Pneumonia, Influenza, or COVID-19 Deaths",df$`Place of Death`)
LevelPlots(df, df$`Pneumonia, Influenza, or COVID-19 Deaths`,"Pneumonia, Influenza, or COVID-19 Deaths",df$`Age group`)
HistoPlots(df, df$`Pneumonia, Influenza, or COVID-19 Deaths`,"Pneumonia, Influenza, or COVID-19 Deaths")
# trying to understand the start/end date part:
unique(df$`End Date`)
[1] "2021-09-25" "2020-12-31" "2020-01-31" "2020-02-29" "2020-03-31" "2020-04-30"
[7] "2020-05-31" "2020-06-30" "2020-07-31" "2020-08-31" "2020-09-30" "2020-10-31"
[13] "2020-11-30" "2021-01-31" "2021-02-28" "2021-03-31" "2021-04-30" "2021-05-31"
[19] "2021-06-30" "2021-07-31" "2021-08-31"
unique(df$`Start Date`)
[1] "2020-01-01" "2021-01-01" "2020-02-01" "2020-03-01" "2020-04-01" "2020-05-01"
[7] "2020-06-01" "2020-07-01" "2020-08-01" "2020-09-01" "2020-10-01" "2020-11-01"
[13] "2020-12-01" "2021-02-01" "2021-03-01" "2021-04-01" "2021-05-01" "2021-06-01"
[19] "2021-07-01" "2021-08-01" "2021-09-01"
unique(df$`Age group`) # maybe delete the firt level
[1] All Ages 0-17 years 18-29 years 30-39 years
[5] 40-49 years 50-64 years 65-74 years 75-84 years
[9] 85 years and over
9 Levels: 0-17 years 18-29 years 30-39 years 40-49 years 50-64 years ... All Ages
unique(df$Group)
[1] By Total By Year By Month
Levels: By Month By Total By Year
unique(df$`HHS Region`)
[1] 0 4 10 9 6 8 1 3 5 7 2
Levels: 0 1 2 3 4 5 6 7 8 9 10
unique(df$State) # maybe delete the firt level
[1] United States Alabama Alaska Arizona
[5] Arkansas California Colorado Connecticut
[9] Delaware District of Columbia Florida Georgia
[13] Hawaii Idaho Illinois Indiana
[17] Iowa Kansas Kentucky Louisiana
[21] Maine Maryland Massachusetts Michigan
[25] Minnesota Mississippi Missouri Montana
[29] Nebraska Nevada New Hampshire New Jersey
[33] New Mexico New York New York City North Carolina
[37] North Dakota Ohio Oklahoma Oregon
[41] Pennsylvania Rhode Island South Carolina South Dakota
[45] Tennessee Texas Utah Vermont
[49] Virginia Washington West Virginia Wisconsin
[53] Wyoming Puerto Rico
54 Levels: Alabama Alaska Arizona Arkansas California Colorado Connecticut ... Wyoming
unique(df$`Place of Death`)# maybe delete the firt level
[1] Total - All Places of Death
[2] Healthcare setting, inpatient
[3] Healthcare setting, outpatient or emergency room
[4] Healthcare setting, dead on arrival
[5] Decedent's home
[6] Hospice facility
[7] Nursing home/long term care facility
[8] Other
[9] Place of death unknown
9 Levels: Decedent's home ... Total - All Places of Death
df_specific<- df %>% filter(`Age group` != "All Ages" & State != "United States" & `Place of Death` != "Total - All Places of Death")
df_specific
###
# Total deaths :
# scatter throuh time :
LevelPlots(df_specific, df_specific$`Total Deaths`,"Total Deaths",df_specific$Group)
LevelPlots(df_specific, df_specific$`Total Deaths`,"Total Deaths",df_specific$`HHS Region`)
LevelPlots(df_specific, df_specific$`Total Deaths`,"Total Deaths",df_specific$State)
LevelPlots(df_specific, df_specific$`Total Deaths`,"Total Deaths",df_specific$`Place of Death`)
LevelPlots(df_specific, df_specific$`Total Deaths`,"Total Deaths",df_specific$`Age group`)
# Remarks:
# "Pneumonia Deaths"
LevelPlots(df_specific, df_specific$`Pneumonia Deaths`,"Pneumonia Deaths",df_specific$Group)
LevelPlots(df_specific, df_specific$`Pneumonia Deaths`,"Pneumonia Deaths",df_specific$`HHS Region`)
LevelPlots(df_specific, df_specific$`Pneumonia Deaths`,"Pneumonia Deaths",df_specific$State)
LevelPlots(df_specific, df_specific$`Pneumonia Deaths`,"Pneumonia Deaths",df_specific$`Place of Death`)
LevelPlots(df_specific, df_specific$`Pneumonia Deaths`,"Pneumonia Deaths",df_specific$`Age group`)
# "Pneumonia and COVID-19 Deaths"
LevelPlots(df_specific, df_specific$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df_specific$Group)
LevelPlots(df_specific, df_specific$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df_specific$`HHS Region`)
LevelPlots(df_specific, df_specific$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df_specific$State)
LevelPlots(df_specific, df_specific$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df_specific$`Place of Death`)
LevelPlots(df_specific, df_specific$`Pneumonia and COVID-19 Deaths`,"Pneumonia and COVID-19 Deaths",df_specific$`Age group`)
# "Influenza Deaths"
LevelPlots(df_specific, df_specific$`Influenza Deaths`,"Influenza Deaths",df_specific$Group)
LevelPlots(df_specific, df_specific$`Influenza Deaths`,"Influenza Deaths",df_specific$`HHS Region`)
LevelPlots(df_specific, df_specific$`Influenza Deaths`,"Influenza Deaths",df_specific$State)
LevelPlots(df_specific, df_specific$`Influenza Deaths`,"Influenza Deaths",df_specific$`Place of Death`)
LevelPlots(df_specific, df_specific$`Influenza Deaths`,"Influenza Deaths",df_specific$`Age group`)
NA
NA
NA
# now same as last week but standardised data: deaths per 10000 people
list_states<-unique(df_specific$State)
TotalDeath_by_state<-df_specific %>%
group_by(State) %>%
summarise(`Total Deaths`= sum(`Total Deaths`,na.rm=TRUE))
PneumoniaDeath<-df_specific %>%
group_by(State) %>%
summarise(`Pneumonia Deaths`= sum(`Pneumonia Deaths`,na.rm=TRUE))
PneumoniaCOVIDDeaths<-df_specific %>%
group_by(State) %>%
summarise(`Pneumonia and COVID-19 Deaths`= sum(`Pneumonia and COVID-19 Deaths`,na.rm=TRUE))
InfluenzaDeaths<-df_specific %>%
group_by(State) %>%
summarise(`Influenza Deaths`= sum(`Influenza Deaths`,na.rm=TRUE))
PneumoniaInfluenza_or_COVIDDeaths<-df_specific %>%
group_by(State) %>%
summarise(`Pneumonia, Influenza, or COVID-19 Deaths`= sum(`Pneumonia, Influenza, or COVID-19 Deaths`,na.rm=TRUE))
# omitting for better view Newyork city and puerto rico
TotalDeath_by_state<-TotalDeath_by_state[-c(34,41),]
PneumoniaDeath_by_state<-PneumoniaDeath[-c(34,41),]
PneumoniaCOVIDDeaths_by_state<-PneumoniaCOVIDDeaths[-c(34,41),]
InfluenzaDeaths_by_state<-InfluenzaDeaths[-c(34,41),]
PneumoniaInfluenza_or_COVIDDeaths_by_state<-PneumoniaInfluenza_or_COVIDDeaths[-c(34,41),]
# standardisind data per 10`000
us_popul<-statepop
standardise_pop<-function(data)
{
data[,2]=data[,2]*10000/us_popul$pop_2015
return(data)
}
TotalDeath_by_state<-standardise_pop(TotalDeath_by_state)
TotalDeath_by_state
PneumoniaDeath_by_state<-standardise_pop(PneumoniaDeath_by_state)
PneumoniaDeath_by_state
PneumoniaCOVIDDeaths_by_state<-standardise_pop(PneumoniaCOVIDDeaths_by_state)
PneumoniaCOVIDDeaths_by_state
InfluenzaDeaths_by_state<-standardise_pop(InfluenzaDeaths_by_state)
InfluenzaDeaths_by_state
PneumoniaInfluenza_or_COVIDDeaths_by_state<-standardise_pop(PneumoniaInfluenza_or_COVIDDeaths_by_state)
PneumoniaInfluenza_or_COVIDDeaths_by_state
# all fine now:
us_popul$full==PneumoniaInfluenza_or_COVIDDeaths_by_state$State
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[18] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[35] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
# right datasets
us_TotalDeath<-us_popul
us_TotalDeath$pop_2015<-TotalDeath_by_state$`Total Deaths`
us_PneumoniaDeath<-us_popul
us_PneumoniaDeath$pop_2015<-PneumoniaCOVIDDeaths_by_state$`Pneumonia and COVID-19 Deaths`
us_PneumoniaCOVIDDeaths<-us_popul
us_PneumoniaCOVIDDeaths$pop_2015<-PneumoniaCOVIDDeaths_by_state$`Pneumonia and COVID-19 Deaths`
us_InfluenzaDeaths<-us_popul
us_InfluenzaDeaths$pop_2015<-InfluenzaDeaths_by_state$`Influenza Deaths`
us_PneumoniaInfluenza_or_COVIDDeaths<-us_popul
us_PneumoniaInfluenza_or_COVIDDeaths$pop_2015<-PneumoniaInfluenza_or_COVIDDeaths_by_state$`Pneumonia, Influenza, or COVID-19 Deaths`
# plots
plot_usmap(data = us_TotalDeath, values = "pop_2015", color = "red") +
scale_fill_continuous(name = "us_TotalDeath", label = scales::comma) +
theme(legend.position = "right")
plot_usmap(data = us_PneumoniaDeath, values = "pop_2015", color = "red") +
scale_fill_continuous(name = "us_PneumoniaDeath", label = scales::comma) +
theme(legend.position = "right")
plot_usmap(data = us_PneumoniaCOVIDDeaths, values = "pop_2015", color = "red") +
scale_fill_continuous(name = "us_PneumoniaCOVIDDeaths)", label = scales::comma) +
theme(legend.position = "right")
plot_usmap(data = us_InfluenzaDeaths, values = "pop_2015", color = "red") +
scale_fill_continuous(name = "us_InfluenzaDeaths", label = scales::comma) +
theme(legend.position = "right")
plot_usmap(data = us_PneumoniaInfluenza_or_COVIDDeaths, values = "pop_2015", color = "red") +
scale_fill_continuous(name = "us_PneumoniaInfluenza_or_COVIDDeaths", label = scales::comma) +
theme(legend.position = "right")
NA
NA
NA
# sort the date depending on the category:
Select_Age_Group<-function(DataFrame, agegroup)
{
age_groups<-unique(df$`Age group`)
if(agegroup %in% age_groups)
{
df1= DataFrame %>% filter(`Age group` == agegroup )
return(df1)
}
else{
warning("Age group selected not in the list, the returned dataframe has not been filtered")
return (DataFrame)
}
}
#Test:
#Select_Age_Group(df,"0-17 years")
Select_Group<-function(DataFrame, group)
{
all_groups<-unique(df$`Group`)
if(group %in% all_groups)
{
df1= DataFrame %>% filter(`Group` == group )
return(df1)
}
else{
warning("Group selected not in the list, the returned dataframe has not been filtered")
return (DataFrame)
}
}
#Test:
#Select_Group(df, "By Total")
Select_HHSRegion<-function(DataFrame, region)
{
all_regions<-unique(df$`HHS Region`)
if(region %in% all_regions)
{
df1= DataFrame %>% filter(`HHS Region` == region )
return(df1)
}
else{
warning("HHS Region selected not in the list, the returned dataframe has not been filtered")
return (DataFrame)
}
}
#Test:
# Select_HHSRegion(df,4)
# Select_HHSRegion(df,-1)
Select_State<-function(DataFrame, state)
{
all_states<-unique(df$State)
if(state %in% all_states)
{
df1= DataFrame %>% filter(`State` == state )
return(df1)
}
else{
warning("State selected not in the list, the returned dataframe has not been filtered")
return (DataFrame)
}
}
#Test:
#Select_State(df,"Hawaii")
#Select_State(df,-1)
Select_PlaceDeath<-function(DataFrame, place_d)
{
all_places<-unique(df$`Place of Death`)
if(place_d %in% all_places)
{
df1= DataFrame %>% filter(`Place of Death` == place_d )
return(df1)
}
else{
warning("Place of death selected not in the list, the returned dataframe has not been filtered")
return (DataFrame)
}
}
#Test:
# Select_PlaceDeath(df,"Healthcare setting, inpatient")
# Select_PlaceDeath(df,-1)
Select_all<-function(DataFrame, agegroup,group,region,state,place_d)
{
# I want to use %>% but not quite confortable, I ll use brute force first:
df1=Select_Age_Group(DataFrame,agegroup)
df2= Select_Group(df1,group)
df3=Select_HHSRegion(df2, region)
df4=Select_State(df3,state)
df5=Select_PlaceDeath(df4,place_d)
return(df5)
}
# eventually gives you 0 or 1 column
#Select_all(df,"0-17 years","By Total",-1,"California","Healthcare setting, inpatient")
#Tests:
Select_PlaceDeath(df,"Healthcare setting, inpatient")
Select_PlaceDeath(df,-1)
Place of death selected not in the list, the returned dataframe has not been filtered
Select_all(df,"0-17 years","By Total",-1,"California","Healthcare setting, inpatient")
HHS Region selected not in the list, the returned dataframe has not been filtered
Select_State(df,"Hawaii")
Select_State(df,-1)
State selected not in the list, the returned dataframe has not been filtered
Select_HHSRegion(df,4)
Select_HHSRegion(df,-1)
HHS Region selected not in the list, the returned dataframe has not been filtered
Select_Group(df, "By Total")
Select_Age_Group(df,"0-17 years")
df_corr_agegroups1<-Select_Age_Group(df,"All Ages") [,8:13]
df_corr_agegroups2<-Select_Age_Group(df,"0-17 years")[,8:13]
df_corr_agegroups3<-Select_Age_Group(df,"18-29 years")[,8:13]
df_corr_agegroups4<-Select_Age_Group(df,"30-39 years")[,8:13]
df_corr_agegroups5<-Select_Age_Group(df,"40-49 years")[,8:13]
df_corr_agegroups6<-Select_Age_Group(df,"50-64 years")[,8:13]
df_corr_agegroups7<-Select_Age_Group(df,"65-74 years")[,8:13]
df_corr_agegroups8<-Select_Age_Group(df,"75-84 years")[,8:13]
df_corr_agegroups9<-Select_Age_Group(df,"85 years and over")[,8:13]
# # install.packages("PerformanceAnalytics")
library(PerformanceAnalytics)
chart.Correlation(df_corr_agegroups1, histogram = TRUE, method = "pearson")
chart.Correlation(df_corr_agegroups2, histogram = TRUE, method = "pearson")
chart.Correlation(df_corr_agegroups3, histogram = TRUE, method = "pearson")
chart.Correlation(df_corr_agegroups4, histogram = TRUE, method = "pearson")
chart.Correlation(df_corr_agegroups5, histogram = TRUE, method = "pearson")
chart.Correlation(df_corr_agegroups6, histogram = TRUE, method = "pearson")
chart.Correlation(df_corr_agegroups7, histogram = TRUE, method = "pearson")
chart.Correlation(df_corr_agegroups8, histogram = TRUE, method = "pearson")
chart.Correlation(df_corr_agegroups9, histogram = TRUE, method = "pearson")
model.matrix(~0+., data=df) %>%
cor(use="pairwise.complete.obs") %>%
ggcorrplot(show.diag = F, type="lower", lab=TRUE, lab_size=2)
Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]) :
contrasts can be applied only to factors with 2 or more levels
st1 <- structable(~Group+`Age group`, df)
#st1
pairs(st1)
st2 <- structable(~`HHS Region`+`State`+`Place of Death`, df)
#st2
pairs(st2)
st_age<- structable(~`COVID-19 Deaths`+`Pneumonia Deaths`+`Age group`, df)
pairs(st_age)